load("~/Documents/ISU/23Fall/STAT579/final project/data/border_clean.Rdata")
#load("C:/Users/rebek/github/packages/borderxing/data/border_clean.Rdata")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
#ggplotly()
#read data
Canada_border <- border %>%
filter(Border == "US-Canada Border")%>%
mutate(
Year = factor(Year,levels = c(1996,1997,1998,1999,2000,2001,2002,
2003,2004,2005,2006,2007,2008,2009,2010,2011,
2012,2013,2014,2015,2016,2017,2018,2019,2020,
2021,2022,2023)),
date=lubridate::my(Date),
month=month(date),
year=year(date)
)
#port code How many port have complete set How many port should we consider ? Does Port_name change ? There’s 118 levels but I only have 90 rows. Only 89 when I group_by Port_name
#Eastport
Canada_border %>%
filter(Port_name == 'Eastport')%>%
group_by(Port_code)%>%
summarise(
n = n()
)
## # A tibble: 0 × 2
## # ℹ 2 variables: Port_code <fct>, n <int>
#Canada_border %>%
#filter(Port_name == 'Eastport')%>%
#group_by(Port_code)%>%
#ggplot(aes(x = Year,y = Value,colour = Port_code))+geom_point()
#Canada_border %>%
#filter(Port_name == 'Eastport')%>%
#group_by(Port_code)%>%
#ggplot(aes(x = Year,y = Value,colour = Port_code))+geom_boxplot()
Canada_border %>%
group_by(State) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State)
Canada_border %>%
group_by(State) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')
#Measure
Canada_border %>%
group_by(Measure) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~Measure)
Canada_border %>%
group_by(Measure) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~Measure,scales = 'free_y')
We can see some seasonal patterns in measure
#Bus
Canada_border %>%
filter(Measure == 'Buses')%>%
group_by(State) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State)
#montana Bus
Canada_border %>%
filter(Measure == 'Buses')%>%
group_by(State) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')
#Personal Vehicles
Canada_border %>%
filter(Measure == 'Personal Vehicles')%>%
group_by(State) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State)
Canada_border %>%
filter(Measure == 'Personal Vehicles')%>%
group_by(State) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')
#Personal Vehicle Passengers
Canada_border %>%
filter(Measure == 'Personal Vehicle Passengers')%>%
group_by(State) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State)
Canada_border %>%
filter(Measure == 'Personal Vehicle Passengers')%>%
group_by(State) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')
I think Personal Vehicles is a factor.
Among_State <- Canada_border %>%
group_by(State) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')
Bus_Among_State <- Canada_border %>%
filter(Measure == 'Buses')%>%
group_by(State) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')
Personal_Vehicles_Among_State <- Canada_border %>%
filter(Measure == 'Personal Vehicles')%>%
group_by(State) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')
Personal_Vehicles_Passengers_Among_State <- Canada_border %>%
filter(Measure == 'Personal Vehicle Passengers')%>%
group_by(State) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()+facet_wrap(~State,scales = 'free_y')
grid.arrange(Among_State, Bus_Among_State, nrow = 2)
grid.arrange(Among_State, Personal_Vehicles_Among_State, nrow = 2)
grid.arrange(Among_State, Personal_Vehicles_Passengers_Among_State, nrow = 2)
#Consider average in each month
Canada_border %>%
group_by(Year,Month) %>%
summarise(
average_value = mean(Value,na.rm = T)
) %>%
ggplot(aes(x = factor(Month), y=average_value,group = Year,colour = Year))+geom_line()
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
Canada_border %>%
group_by(Year,Month) %>%
summarise(
average_value = mean(Value,na.rm = T)
) %>%
ggplot(aes(x = factor(Month), y=average_value,colour = Year))+geom_line(aes(group = Year)) +facet_wrap(~Year)+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
#Montana
Canada_border %>%
filter(Measure == 'Buses' & State == 'Montana')%>%
group_by(State) %>%
ggplot(aes(x = year + (month-1)/12, y=Value))+geom_line()
#Nov 2000, value 124 # I think it is because the election (Nov 7, 2000) #I found that the date is not sorted, I think we should work on it to analyze the plot.
#7 years to a group
Group_1 <- Canada_border %>%
filter(Year=='1996'|Year=='1997'|Year=='1998'|Year=='1999'|Year=='2000'|Year=='2001'|Year=='2002') %>%
group_by(Month) %>%
summarise(
average_value = mean(Value,na.rm = T)
)
Group_2 <- Canada_border %>%
filter(Year=='2003'|Year=='2004'|Year=='2005'|Year=='2006'|Year=='2007'|Year=='2008'|Year=='2009') %>%
group_by(Month) %>%
summarise(
average_value = mean(Value,na.rm = T)
)
Group_3 <- Canada_border %>%
filter(Year=='2010'|Year=='2011'|Year=='2012'|Year=='2013'|Year=='2014'|Year=='2015'|Year=='2016') %>%
group_by(Month) %>%
summarise(
average_value = mean(Value,na.rm = T)
)
Group_4 <- Canada_border %>%
filter(Year=='2017'|Year=='2018'|Year=='2019'|Year=='2020'|Year=='2021'|Year=='2022'|Year=='2023') %>%
group_by(Month) %>%
summarise(
average_value = mean(Value,na.rm = T)
)
ggplot() +
geom_line(data = Group_1,aes(x = Month, y=average_value,group = 1,colour = '1996-2002')) +
geom_line(data = Group_2,aes(x = Month, y=average_value,group = 1,colour = '2003-2009')) +
geom_line(data = Group_3,aes(x = Month, y=average_value,group = 1,colour = '2010-2016')) +
geom_line(data = Group_4,aes(x = Month, y=average_value,group = 1,colour = '2017-2023' )) +
scale_color_manual(name = "Year Group", values = c("1996-2002" = "steelblue", "2003-2009" = "coral2","2010-2016" = "green", "2017-2023" = "orange"))